library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(scatterplot3d)
load("TFR_long.Rdata")
load("IMR_long.RData")
miss = anti_join(IMR_long,
TFR_long,
by = c("country_name","year"))
nrow(miss)
## [1] 0
IMR_TFR = IMR_long %>%
left_join(TFR_long,
by = c("country_name",
"country_code",
"year"))
glimpse(IMR_TFR)
## Rows: 16,226
## Columns: 5
## $ country_name <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Ar…
## $ country_code <chr> "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "…
## $ year <dbl> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 196…
## $ IMR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TFR <dbl> 4.820, 4.655, 4.471, 4.271, 4.059, 3.842, 3.625, 3.417, 3…
g = IMR_TFR %>%
ggplot(aes(IMR,TFR,group = country_name)) +
geom_point(size = .1)
ggplotly(g)
The scatter plot shows that there is a chunk of data at the start and it is difficult to explain and examize the data. To improve on the graph, let us apply few more data visualization techniques.
Let us start.
data <- IMR_TFR %>%
filter(IMR > 100.0000 ,
TFR > 7.0000 )
glimpse(data)
## Rows: 546
## Columns: 5
## $ country_name <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan…
## $ country_code <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "…
## $ year <dbl> 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 197…
## $ IMR <dbl> 237.5, 233.2, 229.2, 225.3, 221.5, 217.7, 213.8, 209.9, 2…
## $ TFR <dbl> 7.284, 7.292, 7.302, 7.304, 7.305, 7.320, 7.339, 7.363, 7…
scatterplot3d(data[,3:5] , angle = 75)
## Warning: Unknown or uninitialised column: `color`.
ggplot(data , aes(x = TFR , y = IMR)) +
geom_area(alpha = 0.1) +
geom_line()
ggplot(data , aes(x= TFR , y = IMR )) +
geom_bin2d() +
theme_bw()